
<?php
/**
 * Created by PhpStorm.
 * User: ttl
 * Date: 2019/8/30
 * Time: 10:07
 */
require_once __DIR__ . '/autoloader.php';
use phpspider\core\phpspider;
use phpspider\core\requests;
use phpspider\core\selector;
use phpspider\core\db;

/* Do NOT delete this comment */
/* 不要删除这段注释 */

$configs = array(
    'name' => '一点资讯',
    'tasknum' => 1,
    //'save_running_state' => true,
    'log_show' => true,
    'domains' => array(
        'www.yidianzixun.com',
        'ib11.go2yd.com',
        'www.yidianzixun.com'

    ),
    'scan_urls' => array(
        "https://www.yidianzixun.com/",
    ),
    'list_url_regexes' => array(
        "https://www.yidianzixun.com",
    ),
    'content_url_regexes' => array(
        "https://www.yidianzixun.com",
    ),
    'db_config' => array(
        'host'  => '127.0.0.1',
        'port'  => 3306,
        'user'  => 'root',
        'pass'  => 'jbx666',
        'name'  => 'p_test',
        'input_encoding' =>'UTF-8',
        'output_encoding'=>'UTF-8'
    ),
    'fields' => array(

    ),
);

$spider = new phpspider($configs);

$spider->on_start = function($phpspider)
{


    requests::set_header("cookie", "wuid=892920355750069; wuid_createAt=2020-04-18 22:29:00; JSESSIONID=7673ac10a38d3a3ee3930a2dcd0a01cf19572477b2987ef83c6f09ea31d8ceb3; Hm_lvt_15fafbae2b9b11d280c79eff3b840e45=1587220141,1587297793,1587356637");
    requests::set_header(":path", "/home/q/news_list_for_channel?channel_id=best&cstart=0&cend=10&infinite=true&refresh=1&__from__=wap&docids=0PABEhQK%2C0P9muQxW%2C0P6A07d8%2C0P8LEWh2%2CT_00Uoq9H6&_spt=yz~eaodhoy~%3A%3B%3A&appid=web_yidian&_=1587390611706");
    requests::set_header(":authority","www.yidianzixun.com");
    requests::set_header(":method","get");
    requests::set_header(":scheme","https");
    requests::set_header("referer","https://www.yidianzixun.com/");
    requests::set_header("sec-fetch-dest","empty");
    requests::set_header("sec-fetch-mode","cors");
    requests::set_header("sec-fetch-site","same-origin");
    requests::set_header("x-requested-with","XMLHttpRequest");
    requests::set_header("accept","*/*");
    requests::set_header("user-agent","Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1");
    //    requests::set_header("Referer", "http://www.9game.cn/kc/");
//    requests::set_header("Referer", "http://www.9game.cn/kc/");
//    requests::set_header("Referer", "http://www.9game.cn/kc/");

//    requests::set_cookie("wuid", "892920355750069");
//    // 把Cookie设置到 www.phpspider.org 域名下
//    requests::set_cookie("wuid_createAt", "2020-04-18 22:29:00");
//    requests::set_cookie("wuid_createAt", "2020-04-18 22:29:00");
//    requests::set_cookie("JSESSIONID", "7673ac10a38d3a3ee3930a2dcd0a01cf19572477b2987ef83c6f09ea31d8ceb3");
//    requests::set_cookie("Hm_lvt_15fafbae2b9b11d280c79eff3b840e45", "1587220141,1587297793,1587356637");
    echo 'on_start '."\n";

    $to_url = 'https://www.yidianzixun.com/home/q/news_list_for_channel?channel_id=best&cstart=0&cend=10&infinite=true&refresh=1&__from__=wap&docids=0PABEhQK%2C0P9muQxW%2C0P6A07d8%2C0P8LEWh2%2CT_00Uoq9H6&_spt=yz~eaodhoy~%3A%3B%3A&appid=web_yidian&_=1587356636389';

    $html = requests::get($to_url);
    $data = json_decode($html, true);
//    $videos = $data['data']['response']['videos'];

    $result = $data['result'];
    $load_url_mode = 'https://www.yidianzixun.com/article/ITEMID?s=';
    $load_img_mode = 'https://ib11.go2yd.com/image.php?url=IMAGEID';
    str_replace("red","black","red green yellow pink purple");
    foreach ($result as $k=>$v){
        $load_url = str_replace("ITEMID",$v['itemid'],$load_url_mode);


        //分类
        $category = $v['category'];
        //标题
        $title = $v['title'];
        //来源
        $source = $v['source'];
        //和标题在一起得图片数组
        $image_urls = $v['image_urls'];
        //点赞数量
        $like = $v['like'];
        //时间
        $date = $v['date'];
        //标签数组
        $dislike_reasons = $v['dislike_reasons'];
        var_dump(json_encode($image_urls));
        //https://ib11.go2yd.com/image.php?url=0P70PTlmHB





        if(count($image_urls)>2){
            foreach ($image_urls as $ik=>$iv){
                var_dump('$iv='.$iv);
                down_load_image2(str_replace("IMAGEID",$iv,$load_img_mode),$iv);
//                $urls = str_replace("IMAGEID",$iv,$load_img_mode);
//                $html = requests::get($urls);
//                var_dump('$urls='.$urls);
//                var_dump('$html='.$html);
//                break;
            }
            var_dump('load_url='.$load_url);
            $load_data = requests::get($load_url);
            $load_data_html = selector::select($load_data, "//article[contains(@id,'js-article')]");
            var_dump($load_data_html);
            break;
        }

    }

    echo "get data <br>";
//    var_dump(json_encode($data));
};



$spider->on_list_page = function($page, $content, $phpspider)
{
    echo 'im enter1 ='.$page['url']."<br>";

    $matches = [];
    $html = stripslashes($content);
    $preg='/<a .*?href="(.*?)".*?>/is';
    preg_match_all($preg,$html,$matches);
    for($i=0;$i<count($matches[1]);$i++)//逐个输出超链接地址
    {
        $hrefs = $matches[1][$i];
        if (strpos($hrefs, '/v?vid=') && strpos($hrefs, 'tab=dongman')) {
            $phpspider->add_url($hrefs);
            $html = requests::get($hrefs);
            $str = stripslashes($html);
            $mp4_arr = [];
            $reg = '/((http|https):\/\/)+(\w+\.)+(\w+)[\w\/\.\-]*(mp4)/';
            preg_match_all($reg, $str, $mp4_arr);
            echo json_encode($mp4_arr[0]) . "\r\n";;
            echo $hrefs . "\r\n";
        }
    }

    // 通知爬虫不再从当前网页中发现待爬url
    return false;
};

$spider->on_scan_page = function($page, $content, $phpspider) {

//    $db_config = $phpspider->get_config("db_config");
//    // 数据库连接
//    db::set_connect('default', $db_config);
//    db::_init();
    echo 'abcdefg'."\n";;
    $to_url = 'https://www.yidianzixun.com/home/q/news_list_for_channel?channel_id=best&cstart=0&cend=10&infinite=true&refresh=1&__from__=wap&docids=0PABEhQK%2C0P9muQxW%2C0P6A07d8%2C0P8LEWh2%2CT_00Uoq9H6&_spt=yz~eaodhoy~%3A%3B%3A&appid=web_yidian&_=1587356636389';

    $html = requests::get($to_url);
    $data = json_decode($html, true);
//    $videos = $data['data']['response']['videos'];

    echo "get data <br>";
    var_dump(json_encode($data));

    // 通知爬虫不再从当前网页中发现待爬url
    return false;
};


$spider->on_content_page = function($page, $content, $phpspider)
{
    echo 'this1='.$page['url'];

//    $html = requests::get($page['url']);
//    $data = selector::select($content, "//a@href");
//    echo json_encode($data);

    return false;
};

$spider->on_extract_page = function($page, $data)
{
    echo 'abbccddee';

    echo 'page=';
    echo $data;
};


$spider->start();

function download($url,$file_name, $path = 'video/')
{
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_URL, $url);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
    curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30);
    $file = curl_exec($ch);
    curl_close($ch);

    if (!file_exists($path)) {
        mkdir($path);
    }

//    $filename = pathinfo($url, PATHINFO_BASENAME).$file_name;
    $filename = $file_name.'.mp4';
    $resource = fopen($path . $filename, 'a');
    fwrite($resource, $file);
    fclose($resource);
    return $filename;
}

function down_load_image($url,$file_name, $path = 'image/')
{
    var_dump('$url='.$url);
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_URL, $url);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
    curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30);
    $file = curl_exec($ch);
    curl_close($ch);

    if (!file_exists($path)) {
        mkdir($path);
    }

//    $filename = pathinfo($url, PATHINFO_BASENAME).$file_name;
    $filename = $file_name.'.jpg';
    $resource = fopen($path . $filename, 'a');
    fwrite($resource, $file);
    fclose($resource);
    return $filename;
}

function down_load_image2($url, $file_name,$path = 'images/')
{
    if (!file_exists($path)) {
        mkdir($path);
    }
    var_dump('$url='.$url);
    var_dump('$file_name='.$file_name);

    $html = file_get_contents($url);//requests::get('https://ib11.go2yd.com/image.php?url=T_00VaNcytR8');
    $resource = fopen($path . $file_name.".jpg", 'a');
    fwrite($resource, $html);
    fclose($resource);
}